Decontam

Decontam

Paths and libraries setting

# Load main packages, paths and custom functions
source("../../../source/main_packages.R")
source("../../../source/paths.R")
source("../../../source/functions.R")

# Load supplementary packages
packages <- c("decontam", "kableExtra", "microbiome", "cowplot")
invisible(lapply(packages, require, character.only = TRUE))

Import MED phyloseq object

setwd(path_rdata)
ps <- readRDS("MED_phyloseq.rds")

Detect contaminants

res <- make_decontam(ps, "1D_MED", path_tsv, threshold=0.6)
decontam_asv_MED <- res[[1]]
decontam_df_MED <- res[[2]]
plots_MED <- res[[3]] # 3 contaminants : Pseudomonas, Enhydrobacter, Rahnella1

# print contaminants
decontam_df_MED   %>% 
  kbl() %>%
  kable_paper("hover", full_width = F)
Kingdom Phylum Class Order Family Genus Species
N0005 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales Moraxellaceae Enhydrobacter NA
N0315 Bacteria Proteobacteria Gammaproteobacteria Enterobacterales Yersiniaceae Rahnella1 NA
N0852 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales Pseudomonadaceae Pseudomonas NA
# histogram of the scores
p <- hist(res[[4]]$p, 100, ylim = c(0,25), xlim = c(0,1), main="", xlab="Score", ylab="Frequency")

#print(p)

Save decontam plot

setwd(path_plot)
pdf("1D_MED_decontam_plots.pdf")
for (i in 1:length(plots_MED)) {
  print(plots_MED[[i]])
}
dev.off()
## quartz_off_screen 
##                 2

Remove contaminants

# remove contaminants ASV
alltaxa <- taxa_names(ps)
decontam_taxa <- alltaxa[!(alltaxa %in% decontam_asv_MED)]
ps2 <- prune_taxa(decontam_taxa, ps)

# check ps 
ps2 <- check_ps(ps2)
ps2
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 67 taxa and 123 samples ]
## sample_data() Sample Data:       [ 123 samples by 16 sample variables ]
## tax_table()   Taxonomy Table:    [ 67 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 67 reference sequences ]
compute_read_counts(ps2)
## [1] 6469784
compute_read_counts(ps)-compute_read_counts(ps2)
## [1] 17460
ps <- ps2

Check blanks

# blanks
ps.blanks <- subset_samples(ps, Location=="Blank")
ps.blanks <- check_ps(ps.blanks)
ps.blanks
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 24 taxa and 8 samples ]
## sample_data() Sample Data:       [ 8 samples by 16 sample variables ]
## tax_table()   Taxonomy Table:    [ 24 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 24 reference sequences ]
compute_read_counts(ps.blanks)
## [1] 16211
# supprimer blanks
ps.filter <- subset_samples(ps, Location!="Blank")
ps.filter <- check_ps(ps.filter)
ps.filter
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 67 taxa and 115 samples ]
## sample_data() Sample Data:       [ 115 samples by 16 sample variables ]
## tax_table()   Taxonomy Table:    [ 67 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 67 reference sequences ]
compute_read_counts(ps.filter)
## [1] 6453573
# check nreads
summarize_phyloseq(ps.filter)
## Compositional = NO2
## 1] Min. number of reads = 952] Max. number of reads = 7361593] Total number of reads = 64535734] Average number of reads = 56118.02608695655] Median number of reads = 251717] Sparsity = 0.6486696950032456] Any OTU sum to 1 or less? NO8] Number of singletons = 09] Percent of OTUs that are singletons 
##         (i.e. exactly one read detected across all samples)010] Number of sample variables are: 16SampleWellPrimer1Primer2LocationFieldCountryOrganSpeciesIndividualIndividualsDateRunControlDnaRead_depth2
## [[1]]
## [1] "1] Min. number of reads = 95"
## 
## [[2]]
## [1] "2] Max. number of reads = 736159"
## 
## [[3]]
## [1] "3] Total number of reads = 6453573"
## 
## [[4]]
## [1] "4] Average number of reads = 56118.0260869565"
## 
## [[5]]
## [1] "5] Median number of reads = 25171"
## 
## [[6]]
## [1] "7] Sparsity = 0.648669695003245"
## 
## [[7]]
## [1] "6] Any OTU sum to 1 or less? NO"
## 
## [[8]]
## [1] "8] Number of singletons = 0"
## 
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n        (i.e. exactly one read detected across all samples)0"
## 
## [[10]]
## [1] "10] Number of sample variables are: 16"
## 
## [[11]]
##  [1] "Sample"      "Well"        "Primer1"     "Primer2"     "Location"   
##  [6] "Field"       "Country"     "Organ"       "Species"     "Individual" 
## [11] "Individuals" "Date"        "Run"         "Control"     "Dna"        
## [16] "Read_depth"

Counts

Culex pipiens

# Culex pipiens
cat("ps_pipiens\n\n")
## ps_pipiens
ps.pipiens <- subset_samples(ps.filter, Species=="Culex pipiens")
ps.pipiens <- check_ps(ps.pipiens)
ps.pipiens
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 49 taxa and 83 samples ]
## sample_data() Sample Data:       [ 83 samples by 16 sample variables ]
## tax_table()   Taxonomy Table:    [ 49 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 49 reference sequences ]
cat(paste0("read count: ", compute_read_counts(ps.pipiens), "\n\n"))
## read count: 2574603
ps.pipiens.field <- ps.pipiens %>% subset_samples(Field=="Field")
ps.pipiens.field <- ps.pipiens.field %>% check_ps()

# Culex pipiens - Bosc
cat("ps_pipiens_field_bosc\n\n")
## ps_pipiens_field_bosc
ps.pipiens.field.bosc <- ps.pipiens.field %>% subset_samples(Location=="Bosc")
ps.pipiens.field.bosc <- ps.pipiens.field.bosc %>% check_ps()
ps.pipiens.field.bosc
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 36 taxa and 23 samples ]
## sample_data() Sample Data:       [ 23 samples by 16 sample variables ]
## tax_table()   Taxonomy Table:    [ 36 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 36 reference sequences ]
cat(paste0("read count: ", compute_read_counts(ps.pipiens.field.bosc), "\n\n"))
## read count: 803574
# Culex pipiens - Camping Europe 
cat("ps_pipiens_field_camping\n\n")
## ps_pipiens_field_camping
ps.pipiens.field.camping <- ps.pipiens.field %>% subset_samples(Location=="Camping Europe")
ps.pipiens.field.camping <- ps.pipiens.field.camping %>% check_ps()
ps.pipiens.field.camping
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 39 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 16 sample variables ]
## tax_table()   Taxonomy Table:    [ 39 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 39 reference sequences ]
cat(paste0("read count: ", compute_read_counts(ps.pipiens.field.camping), "\n\n"))
## read count: 869617
# Culex pipiens - Lavar
cat("ps_pipiens_lavar\n\n")
## ps_pipiens_lavar
ps.pipiens.lavar <- ps.pipiens %>% subset_samples(Location=="Lavar (labo)")
ps.pipiens.lavar <- ps.pipiens.lavar %>% check_ps()
ps.pipiens.lavar
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 48 taxa and 35 samples ]
## sample_data() Sample Data:       [ 35 samples by 16 sample variables ]
## tax_table()   Taxonomy Table:    [ 48 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 48 reference sequences ]
cat(paste0("read count: ", compute_read_counts(ps.pipiens.lavar), "\n\n"))
## read count: 901412

Culex quinquefasciatus

# Culex quinquefasciatus
cat("ps_quinque\n\n")
## ps_quinque
ps.quinque <- subset_samples(ps.filter, Species=="Culex quinquefasciatus")
ps.quinque <- check_ps(ps.quinque)
ps.quinque
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 59 taxa and 21 samples ]
## sample_data() Sample Data:       [ 21 samples by 16 sample variables ]
## tax_table()   Taxonomy Table:    [ 59 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 59 reference sequences ]
cat(paste0("read count: ", compute_read_counts(ps.quinque), "\n\n"))
## read count: 1925054
# Culex quinquefasciatus - Field
cat("ps_quinque_field\n\n")
## ps_quinque_field
ps.quinque.field <- ps.quinque %>% subset_samples(Field=="Field")
ps.quinque.field <- ps.quinque.field %>% check_ps()
ps.quinque.field
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 47 taxa and 7 samples ]
## sample_data() Sample Data:       [ 7 samples by 16 sample variables ]
## tax_table()   Taxonomy Table:    [ 47 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 47 reference sequences ]
cat(paste0("read count: ", compute_read_counts(ps.quinque.field), "\n\n"))
## read count: 1721053
# Culex quinquefasciatus - lab
cat("ps_quinque_lab\n\n")
## ps_quinque_lab
ps.quinque.lab <- ps.quinque %>% subset_samples(Field=="Lab ")
ps.quinque.lab <- ps.quinque.lab %>% check_ps()
ps.quinque.lab
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 45 taxa and 14 samples ]
## sample_data() Sample Data:       [ 14 samples by 16 sample variables ]
## tax_table()   Taxonomy Table:    [ 45 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 45 reference sequences ]
cat(paste0("read count: ", compute_read_counts(ps.quinque.lab), "\n\n"))
## read count: 204001

Aedes aegypti

# Aedes aegypti
cat("ps_aedes\n\n")
## ps_aedes
ps.aedes <- subset_samples(ps.filter, Species=="Aedes aegypti")
ps.aedes <- check_ps(ps.aedes)
ps.aedes
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 54 taxa and 11 samples ]
## sample_data() Sample Data:       [ 11 samples by 16 sample variables ]
## tax_table()   Taxonomy Table:    [ 54 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 54 reference sequences ]
cat(paste0("read count: ", compute_read_counts(ps.aedes), "\n\n"))
## read count: 1953916

Number of reads by sample

sample_data(ps.filter)$Read_depth <- sample_sums(ps.filter)
metadata_read <- data.frame(ps.filter@sam_data)

make.italic <- function(x) as.expression(lapply(x, function(y) bquote(italic(.(y)))))

levels(metadata_read$Species)= c("Aedes aegypti"=make.italic("Aedes aegypti"),
               "Culex pipiens"=make.italic("Culex pipiens"),
               "Culex quinquefasciatus"=make.italic("Culex quinquefasciatus"))

levels(metadata_read$Location) <- c("Bosc", "Camping~Europe", "Guadeloupe", "Lavar~(lab)", "Wolbachia~'-'~(Slab~TC)")

# metadata_read$f2 <- factor(metadata_read$Species, labels = c("italic(Aedesaegypti)", "italic(Culexpipiens)", "italic(Culexquinquefasciatus)"))

guide_italics <- guides(fill = guide_legend(label.theme = element_text(size = 10, face = "italic", colour = "Black", angle = 0)))

p <- ggplot(metadata_read, aes(x = Sample, y = Read_depth))+ 
  geom_bar(position = "dodge", stat = "identity")+
    scale_fill_manual(values = col)+
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90)) +
  ggtitle("") + 
  # guide_italics+
  theme(legend.title = element_text(size = 18), 
        legend.position="bottom",
        legend.text=element_text(size=11), 
        panel.spacing.y=unit(1, "lines"), 
        panel.spacing.x=unit(0.8, "lines"),
        panel.spacing=unit(0,"lines"),
        strip.background=element_rect(color="grey30", fill="grey90"),
        panel.border=element_rect(color="grey90"),
        axis.ticks.x=element_blank(),
        strip.text.x = element_text(size = 12)) +
 facet_wrap(~Species+Location+Organ, scales = "free_x", ncol=4, labeller=label_parsed)+
  labs(y="Sequence counts")+
  ylim(0, 550000)+
  geom_text(aes(label=Read_depth), position=position_dodge(width=0.5), size=2, hjust=-0.25, vjust=0.25, angle=90)+
  guides(fill=guide_legend(title="Oligotype", label.theme = element_text(size = 10, face = "italic", colour = "Black", angle = 0)))

p

Remove sample with number of reads <1000

a <- prune_samples(sample_sums(ps.filter)<=1000, ps.filter)
test <- data.frame(a@sam_data)
test <- test[test$Sample!="NP20" & test$Sample!="NP29" & test$Sample!="NP30" & test$Sample!="NP34" & test$Sample!="NP36",]
toremove <- test$Sample

ps.filter <- subset_samples(ps.filter, !(Sample %in% toremove))
ps.filter
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 67 taxa and 113 samples ]
## sample_data() Sample Data:       [ 113 samples by 16 sample variables ]
## tax_table()   Taxonomy Table:    [ 67 taxa by 7 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 67 reference sequences ]
compute_read_counts(ps.filter)
## [1] 6452623

Final number of reads by sample

sample_data(ps.filter)$Read_depth <- sample_sums(ps.filter)
metadata_read <- data.frame(ps.filter@sam_data)

levels(metadata_read$Species)= c("Aedes aegypti"=make.italic("Aedes aegypti"),
               "Culex pipiens"=make.italic("Culex pipiens"),
               "Culex quinquefasciatus"=make.italic("Culex quinquefasciatus"))

levels(metadata_read$Location) <- c("Bosc", "Camping~Europe", "Guadeloupe", "Lavar~(lab)", expression(paste(italic("Wolbachia"),"- (Slab TC)")))

metadata_read_whole <- metadata_read[metadata_read$Organ=="Whole",]
metadata_read_ovary <- metadata_read[metadata_read$Organ=="Ovary",]



guide_italics <- guides(fill = guide_legend(label.theme = element_text(size = 10, face = "italic", colour = "Black", angle = 0)))

p <- ggplot(metadata_read_whole, aes(x = Sample, y = Read_depth))+ 
  geom_bar(position = "dodge", stat = "identity")+
    scale_fill_manual(values = col)+
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, vjust=1, hjust=1, size=15)) +
  ggtitle("") + 
  guide_italics+
  theme(legend.title = element_text(size = 18), 
        legend.position="bottom",
        legend.text=element_text(size=16), 
        panel.spacing.y=unit(1, "lines"), 
        panel.spacing.x=unit(0.8, "lines"),
        panel.spacing=unit(0,"lines"),
        strip.background=element_rect(color="grey30", fill="grey90"),
        panel.border=element_rect(color="grey90"),
        axis.ticks.x=element_blank(),
        strip.text.x = element_text(size = 16),
        axis.text = element_text(size = 15)) +
 facet_wrap(~Species+Location+Organ, scales = "free_x", ncol=3, labeller=label_parsed)+
  labs(y="Sequence counts")+
  ylim(0, 1000000)+
  #geom_text(aes(label=Read_depth), position=position_dodge(width=0.5), size=4, hjust=-0.25, vjust=0.25, angle=90)+
  geom_text(aes(label=Read_depth), position=position_dodge(width=0.5), size=4, angle=90, hjust=-0.1, vjust=0.25)+
  guides(fill=guide_legend(title="Oligotype"))

p

p2 <- ggplot(metadata_read_ovary, aes(x = Sample, y = Read_depth))+ 
  geom_bar(position = "dodge", stat = "identity")+
    scale_fill_manual(values = col)+
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, vjust=1, hjust=1, size=15)) +
  ggtitle("") + 
  guide_italics+
  theme(legend.title = element_text(size = 18), 
        legend.position="bottom",
        legend.text=element_text(size=16), 
        panel.spacing.y=unit(1, "lines"), 
        panel.spacing.x=unit(0.8, "lines"),
        panel.spacing=unit(0,"lines"),
        strip.background=element_rect(color="grey30", fill="grey90"),
        panel.border=element_rect(color="grey90"),
        axis.ticks.x=element_blank(),
        strip.text.x = element_text(size = 16),
        axis.text = element_text(size = 15)) +
 facet_wrap(~Species+Location+Organ, scales = "free_x", ncol=3, labeller=label_parsed)+
  labs(y="Sequence counts")+
  ylim(0, 1000000)+
  geom_text(aes(label=Read_depth), position=position_dodge(width=0.5), size=4, angle=90, hjust=-0.1, vjust=0.25)+
  guides(fill=guide_legend(title="Oligotype"))

p2

# panels
p_group <- plot_grid(p+theme(legend.position="none"), 
          p2+theme(legend.position="none"), 
          nrow=2, 
          ncol=1)+
    draw_plot_label(c("A", "B"), c(0, 0), c(1, 0.5), size = 15)

legend_plot <- get_legend(p + theme(legend.position="bottom"))

p_counts <- plot_grid(p_group, legend_plot, nrow=2, ncol=1, rel_heights = c(1, .1))
p_counts

Final number of reads by Genus

new_names_genus <- c("Wolbachia",
               "Asaia",
               "Legionella",
               "Elizabethkingia",
               "Chryseobacterium",
               "Erwinia",
               "Morganella",
               "Pseudomonas",
               "Delftia",
               "Methylobacterium-Methylorubrum",
               "Serratia",
               "Coetzeea",
               "NA"
)

col_genus <- c("Wolbachia"="#FEB24C",
               "Asaia"="#10E015",
               "Legionella"="#DE3F23",
               "Elizabethkingia"="#66A7ED",
               "Chryseobacterium"="#F899FF",
               "Erwinia"="#FFE352",
               "Morganella"="#F5E4D3",
               "Pseudomonas"="#DBF5F0",
               "Delftia"="#C7C5B7",
               "Methylobacterium-Methylorubrum"="blue",
               "Serratia"="#B136F5",
               "Coetzeea"="red",
               "NA"="grey")


p <- plot_bar(ps.filter, "Genus", "Abundance", "Genus")+
  geom_bar(aes(color=Genus, fill=Genus), stat="identity", position="stack")+
  scale_fill_manual(values = col_genus)+
  labs(x="Genus", y="Number of read") +
 facet_wrap(~sample_Species+Location+Organ, scales = "free", ncol=3)
## Warning in psmelt(physeq): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning in psmelt(physeq): The sample variables: 
## Species
##  have been renamed to: 
## sample_Species
## to avoid conflicts with taxonomic rank names.
p

data <- p$data

labels = c("Wolbachia"=make.italic("Wolbachia"),
                      "Asaia"=make.italic("Asaia"),
                      "Legionella"=make.italic("Legionella"),
                      "Elizabethkingia"=make.italic("Elizabethkingia"),
                      "Chryseobacterium"=make.italic("Chryseobacterium"),
                      "Erwinia"=make.italic("Erwinia"),
                      "Morganella"=make.italic("Morganella"),
                      "Pseudomonas"=make.italic("Pseudomonas"),
                      "Delftia"=make.italic("Delftia"),
                      "Methylobacterium-Methylorubrum"=make.italic("Methylobacterium-Methylorubrum"),
                      "Serratia"=make.italic("Serratia"),
                      "Coetzeea"=make.italic("Coetzeea"),
                      "NA"
)


data_pipiens_whole <- data[data$sample_Species=="Culex pipiens" & data$Organ=="Whole",]
data_pipiens_ovary <- data[data$sample_Species=="Culex pipiens" & data$Organ=="Ovary",]

data_quinque_whole <- data[data$sample_Species=="Culex quinquefasciatus" & data$Organ=="Whole",]
data_quinque_ovary <- data[data$sample_Species=="Culex quinquefasciatus" & data$Organ=="Ovary",]

data_aedes_whole <- data[data$sample_Species=="Aedes aegypti" & data$Organ=="Whole",]
data_aedes_ovary <- data[data$sample_Species=="Aedes aegypti" & data$Organ=="Ovary",]

# pipiens
## whole
### all
pipiens1 <- percent_taxonomic_plot_test(data=data_pipiens_whole, variable=sample_Species, selection="Culex pipiens", group="Genus", organ="Whole", new_names=new_names_genus, col=col_genus, mylabels=labels)+ 
  labs(tag = "(W)")+
  theme(plot.tag.position = "topright")
## `summarise()` regrouping output by 'sample_Species' (override with `.groups` argument)
### field
pipiens2 <- percent_taxonomic_plot_test(data=data_pipiens_whole, variable=Location, selection="Bosc", group="Genus", organ="Whole", new_names=new_names_genus, col=col_genus, mylabels=labels)+ 
  labs(tag = "(W)")+
  theme(plot.tag.position = "topright")
## `summarise()` regrouping output by 'Location' (override with `.groups` argument)
## Warning: Unknown or uninitialised column: `sample_Species`.
## Warning: Unknown or uninitialised column: `Organ`.
pipiens3 <- percent_taxonomic_plot_test(data=data_pipiens_whole, variable=Location, selection="Camping Europe", group="Genus", organ="Whole", new_names=new_names_genus, col=col_genus, mylabels=labels)+ 
  labs(tag = "(W)")+
  theme(plot.tag.position = "topright")
## `summarise()` regrouping output by 'Location' (override with `.groups` argument)
## Warning: Unknown or uninitialised column: `sample_Species`.

## Warning: Unknown or uninitialised column: `Organ`.
### labo
pipiens4 <- percent_taxonomic_plot_test(data=data_pipiens_whole, variable=Location, selection="Lavar (labo)", group="Genus", organ="Whole", new_names=new_names_genus, col=col_genus, mylabels=labels)+ 
  labs(tag = "(W)")+
  theme(plot.tag.position = "topright")
## `summarise()` regrouping output by 'Location' (override with `.groups` argument)
## Warning: Unknown or uninitialised column: `sample_Species`.

## Warning: Unknown or uninitialised column: `Organ`.
## ovary
## all
pipiens5 <- percent_taxonomic_plot_test(data=data_pipiens_ovary, variable=sample_Species, selection="Culex pipiens", group="Genus", organ="Ovary", new_names=new_names_genus, col=col_genus, mylabels=labels)+ 
  labs(tag = "(O)")+
  theme(plot.tag.position = "topright")
## `summarise()` regrouping output by 'sample_Species' (override with `.groups` argument)
# quinque
## whole
### all
quinque1 <- percent_taxonomic_plot_test(data=data_quinque_whole, variable=sample_Species, selection="Culex quinquefasciatus", group="Genus", organ="Whole", new_names=new_names_genus, col=col_genus, mylabels=labels)+ 
  labs(tag = "(W)")+
  theme(plot.tag.position = "topright")
## `summarise()` regrouping output by 'sample_Species' (override with `.groups` argument)
### field
quinque2 <- percent_taxonomic_plot_test(data=data_quinque_whole, variable=Location, selection="Guadeloupe", group="Genus", organ="Whole", new_names=new_names_genus, col=col_genus, mylabels=labels)+ 
  labs(tag = "(W)")+
  theme(plot.tag.position = "topright")
## `summarise()` regrouping output by 'Location' (override with `.groups` argument)
## Warning: Unknown or uninitialised column: `sample_Species`.

## Warning: Unknown or uninitialised column: `Organ`.
### labo
quinque3 <- percent_taxonomic_plot_test(data=data_quinque_whole, variable=Location, selection="Wolbachia -",group="Genus", organ="Whole", new_names=new_names_genus, col=col_genus, mylabels=labels)+ 
  labs(tag = "(W)")+
  theme(plot.tag.position = "topright")
## `summarise()` regrouping output by 'Location' (override with `.groups` argument)
## Warning: Unknown or uninitialised column: `sample_Species`.

## Warning: Unknown or uninitialised column: `Organ`.
quinque3[["labels"]][["title"]] <- expression(paste(italic("Wolbachia"), "- (Slab TC)"))

## ovary
## all
quinque4 <- percent_taxonomic_plot_test(data=data_quinque_ovary, variable=sample_Species, selection="Culex quinquefasciatus",group="Genus", organ="Ovary", new_names=new_names_genus, col=col_genus, mylabels=labels)+ 
  labs(tag = "(O)")+
  theme(plot.tag.position = "topright")
## `summarise()` regrouping output by 'sample_Species' (override with `.groups` argument)
# aedes
## whole
### all
aedes1 <- percent_taxonomic_plot_test(data=data_aedes_whole, variable=sample_Species, selection="Aedes aegypti",group="Genus", organ="Whole", new_names=new_names_genus, col=col_genus, mylabels=labels)+ 
  labs(tag = "(W)")+
  theme(plot.tag.position = "topright")
## `summarise()` regrouping output by 'sample_Species' (override with `.groups` argument)
## ovary
## all
aedes2 <- percent_taxonomic_plot_test(data=data_aedes_ovary, variable=sample_Species, selection="Aedes aegypti", group="Genus", organ="Ovary", new_names=new_names_genus, col=col_genus, mylabels=labels)+ 
  labs(tag = "(O)")+
  theme(plot.tag.position = "topright")
## `summarise()` regrouping output by 'sample_Species' (override with `.groups` argument)
p_genus <- plot_grid(pipiens1+ theme(legend.position="none"), 
                     pipiens2+ theme(legend.position="none"), 
                     pipiens3+ theme(legend.position="none"), 
                     pipiens4+ theme(legend.position="none"), 
                     pipiens5+ theme(legend.position="none"),
                     quinque1+ theme(legend.position="none"), 
                     quinque2+ theme(legend.position="none"), 
                     quinque3+ theme(legend.position="none"), 
                     quinque4+ theme(legend.position="none"), 
                     plot.new(),
                     aedes1+ theme(legend.position="none"), 
                     aedes2+ theme(legend.position="none"), 
                     plot.new(),
                     plot.new(),
                     plot.new(),
                     nrow=3, 
                     ncol=5)+
  draw_plot_label(c("A", "B", "C"), c(0, 0, 0), c(1, 2/3, 1/3), size = 15)

p_genus

Counts after removing samples

Create a dataframe

x <- c("Culex pipiens", "Bosc", "Camping Europe", "Lavar (lab)", 
       "Culex quinquefasciatus", "AGuadeloupe", "Wolbachia - (Slab TC)",
       "Aedes aegypti", "BGuadeloupe",
       "Total")
y <- c("Reads", "Oligotypes", "Samples")

df <- data.frame(matrix(ncol=3, nrow=10))
rownames(df) <- x
colnames(df) <- y

df2 <- df
df3 <- df

Whole + Ovary

# Culex pipiens
ps.pipiens <- subset_samples(ps.filter, Species=="Culex pipiens")
ps.pipiens <- check_ps(ps.pipiens)

## All location
### oligotype
nrow(ps.pipiens@otu_table) -> df["Culex pipiens", "Oligotypes"]
### samples
nrow(ps.pipiens@sam_data) -> df["Culex pipiens", "Samples"]
### reads
compute_read_counts(ps.pipiens) -> df["Culex pipiens", "Reads"]


## Bosc
### oligotype
nrow(otu_table(ps.pipiens %>% 
                 subset_samples(Location=="Bosc") %>% 
                 check_ps())) -> df["Bosc", "Oligotypes"]
### samples
nrow(ps.pipiens@sam_data %>% subset_samples(Location=="Bosc")) -> df["Bosc", "Samples"]
### reads
compute_read_counts(ps.pipiens %>% subset_samples(Location=="Bosc")) -> df["Bosc", "Reads"]


## Camping Europe
### oligotype
nrow(otu_table(ps.pipiens %>% 
                 subset_samples(Location=="Camping Europe") %>% 
                 check_ps())) -> df["Camping Europe", "Oligotypes"]
### samples
nrow(ps.pipiens@sam_data %>% subset_samples(Location=="Camping Europe")) -> df["Camping Europe", "Samples"]
### reads
compute_read_counts(ps.pipiens %>% subset_samples(Location=="Camping Europe")) -> df["Camping Europe", "Reads"]

## Lavar (labo)
### oligotype
nrow(otu_table(ps.pipiens %>% 
                 subset_samples(Location=="Lavar (labo)") %>% 
                 check_ps())) -> df["Lavar (lab)", "Oligotypes"]
### samples
nrow(ps.pipiens@sam_data %>% subset_samples(Location=="Lavar (labo)")) -> df["Lavar (lab)", "Samples"]
### reads
compute_read_counts(ps.pipiens %>% subset_samples(Location=="Lavar (labo)")) -> df["Lavar (lab)", "Reads"]


# Culex quinquefasciatus
ps.quinque <- subset_samples(ps.filter, Species=="Culex quinquefasciatus")
ps.quinque <- check_ps(ps.quinque)

## All location
### oligotype
nrow(ps.quinque@otu_table) -> df["Culex quinquefasciatus", "Oligotypes"]
### samples
nrow(ps.quinque@sam_data) -> df["Culex quinquefasciatus", "Samples"]
### reads
compute_read_counts(ps.quinque) -> df["Culex quinquefasciatus", "Reads"]


## Guadeloupe
### oligotype
nrow(otu_table(ps.quinque %>% 
                 subset_samples(Location=="Guadeloupe") %>% 
                 check_ps())) -> df["AGuadeloupe", "Oligotypes"]
### samples
nrow(ps.quinque@sam_data %>% subset_samples(Location=="Guadeloupe")) -> df["AGuadeloupe", "Samples"]
### reads
compute_read_counts(ps.quinque %>% subset_samples(Location=="Guadeloupe")) -> df["AGuadeloupe", "Reads"]


## Wolbachia -
### oligotype
nrow(otu_table(ps.quinque %>% 
                 subset_samples(Location=="Wolbachia -") %>% 
                 check_ps())) -> df["Wolbachia - (Slab TC)", "Oligotypes"]
### samples
nrow(ps.quinque@sam_data %>% subset_samples(Location=="Wolbachia -")) -> df["Wolbachia - (Slab TC)", "Samples"]
### reads
compute_read_counts(ps.quinque %>% subset_samples(Location=="Wolbachia -")) -> df["Wolbachia - (Slab TC)", "Reads"]


# Aedes aegypti
ps.aedes <- subset_samples(ps.filter, Species=="Aedes aegypti")
ps.aedes <- check_ps(ps.aedes)

## Guadeloupe
### oligotype
nrow(otu_table(ps.aedes %>% 
                 subset_samples(Location=="Guadeloupe") %>% 
                 check_ps())) -> df[c("BGuadeloupe","Aedes aegypti"), "Oligotypes"]
### samples
nrow(ps.aedes@sam_data %>% subset_samples(Location=="Guadeloupe")) -> df[c("BGuadeloupe","Aedes aegypti"), "Samples"]
### reads
compute_read_counts(ps.aedes %>% subset_samples(Location=="Guadeloupe")) -> df[c("BGuadeloupe","Aedes aegypti"), "Reads"]

# Total
### oligotype
nrow(ps.filter@otu_table) -> df["Total", "Oligotypes"]
### samples
nrow(ps.filter@sam_data) -> df["Total", "Samples"]
### reads
compute_read_counts(ps.filter) -> df["Total", "Reads"]

Whole

# Culex pipiens
ps.pipiens.whole <- ps.pipiens %>% subset_samples(Organ=="Whole")
ps.pipiens.whole <- ps.pipiens.whole %>% check_ps()

## All location
### oligotype
nrow(ps.pipiens.whole@otu_table) -> df2["Culex pipiens", "Oligotypes"]
### samples
nrow(ps.pipiens.whole@sam_data) -> df2["Culex pipiens", "Samples"]
### reads
compute_read_counts(ps.pipiens.whole) -> df2["Culex pipiens", "Reads"]


## Bosc
### oligotype
nrow(otu_table(ps.pipiens.whole %>% 
                 subset_samples(Location=="Bosc") %>% 
                 check_ps())) -> df2["Bosc", "Oligotypes"]
### samples
nrow(ps.pipiens.whole@sam_data %>% subset_samples(Location=="Bosc")) -> df2["Bosc", "Samples"]
### reads
compute_read_counts(ps.pipiens.whole %>% subset_samples(Location=="Bosc")) -> df2["Bosc", "Reads"]


## Camping Europe
### oligotype
nrow(otu_table(ps.pipiens.whole %>% 
                 subset_samples(Location=="Camping Europe") %>% 
                 check_ps())) -> df2["Camping Europe", "Oligotypes"]
### samples
nrow(ps.pipiens.whole@sam_data %>% subset_samples(Location=="Camping Europe")) -> df2["Camping Europe", "Samples"]
### reads
compute_read_counts(ps.pipiens.whole %>% subset_samples(Location=="Camping Europe")) -> df2["Camping Europe", "Reads"]

## Lavar (labo)
### oligotype
nrow(otu_table(ps.pipiens.whole %>% 
                 subset_samples(Location=="Lavar (labo)") %>% 
                 check_ps())) -> df2["Lavar (lab)", "Oligotypes"]
### samples
nrow(ps.pipiens.whole@sam_data %>% subset_samples(Location=="Lavar (labo)")) -> df2["Lavar (lab)", "Samples"]
### reads
compute_read_counts(ps.pipiens.whole %>% subset_samples(Location=="Lavar (labo)")) -> df2["Lavar (lab)", "Reads"]



# Culex quinquefasciatus
ps.quinque.whole <- subset_samples(ps.quinque, Organ=="Whole")
ps.quinque.whole <- check_ps(ps.quinque.whole)

## All location
### oligotype
nrow(ps.quinque.whole@otu_table) -> df2["Culex quinquefasciatus", "Oligotypes"]
### samples
nrow(ps.quinque.whole@sam_data) -> df2["Culex quinquefasciatus", "Samples"]
### reads
compute_read_counts(ps.quinque.whole) -> df2["Culex quinquefasciatus", "Reads"]


## Guadeloupe
### oligotype
nrow(otu_table(ps.quinque.whole %>% 
                 subset_samples(Location=="Guadeloupe") %>% 
                 check_ps())) -> df2["AGuadeloupe", "Oligotypes"]
### samples
nrow(ps.quinque.whole@sam_data %>% subset_samples(Location=="Guadeloupe")) -> df2["AGuadeloupe", "Samples"]
### reads
compute_read_counts(ps.quinque.whole %>% subset_samples(Location=="Guadeloupe")) -> df2["AGuadeloupe", "Reads"]


## Wolbachia -
### oligotype
nrow(otu_table(ps.quinque.whole %>% 
                 subset_samples(Location=="Wolbachia -") %>% 
                 check_ps())) -> df2["Wolbachia - (Slab TC)", "Oligotypes"]
### samples
nrow(ps.quinque.whole@sam_data %>% subset_samples(Location=="Wolbachia -")) -> df2["Wolbachia - (Slab TC)", "Samples"]
### reads
compute_read_counts(ps.quinque.whole %>% subset_samples(Location=="Wolbachia -")) -> df2["Wolbachia - (Slab TC)", "Reads"]



# Aedes aegypti
ps.aedes.whole <- subset_samples(ps.aedes, Organ=="Whole")
ps.aedes.whole <- check_ps(ps.aedes.whole)

## Guadeloupe
### oligotype
nrow(otu_table(ps.aedes.whole %>% 
                 subset_samples(Location=="Guadeloupe") %>% 
                 check_ps())) -> df2[c("BGuadeloupe","Aedes aegypti"), "Oligotypes"]
### samples
nrow(ps.aedes.whole@sam_data %>% subset_samples(Location=="Guadeloupe")) -> df2[c("BGuadeloupe","Aedes aegypti"), "Samples"]
### reads
compute_read_counts(ps.aedes.whole %>% subset_samples(Location=="Guadeloupe")) -> df2[c("BGuadeloupe","Aedes aegypti"), "Reads"]


# Total
ps.whole <- ps.filter %>% subset_samples(Organ=="Whole")
ps.whole <- ps.whole %>% check_ps()
### oligotype
nrow(ps.whole@otu_table) -> df2["Total", "Oligotypes"]
### samples
nrow(ps.whole@sam_data) -> df2["Total", "Samples"]
### reads
compute_read_counts(ps.whole) -> df2["Total", "Reads"]

Ovary

# Culex pipiens
ps.pipiens.ovary <- ps.pipiens %>% subset_samples(Organ=="Ovary")
ps.pipiens.ovary <- ps.pipiens.ovary %>% check_ps()

## All location
### oligotype
nrow(ps.pipiens.ovary@otu_table) -> df3["Culex pipiens", "Oligotypes"]
### samples
nrow(ps.pipiens.ovary@sam_data) -> df3["Culex pipiens", "Samples"]
### reads
compute_read_counts(ps.pipiens.ovary) -> df3["Culex pipiens", "Reads"]


## Bosc
### oligotype
nrow(otu_table(ps.pipiens.ovary %>% 
                 subset_samples(Location=="Bosc") %>% 
                 check_ps())) -> df3["Bosc", "Oligotypes"]
### samples
nrow(ps.pipiens.ovary@sam_data %>% subset_samples(Location=="Bosc")) -> df3["Bosc", "Samples"]
### reads
compute_read_counts(ps.pipiens.ovary %>% subset_samples(Location=="Bosc")) -> df3["Bosc", "Reads"]


## Camping Europe
### oligotype
nrow(otu_table(ps.pipiens.ovary %>% 
                 subset_samples(Location=="Camping Europe") %>% 
                 check_ps())) -> df3["Camping Europe", "Oligotypes"]
### samples
nrow(ps.pipiens.ovary@sam_data %>% subset_samples(Location=="Camping Europe")) -> df3["Camping Europe", "Samples"]
### reads
compute_read_counts(ps.pipiens.ovary %>% subset_samples(Location=="Camping Europe")) -> df3["Camping Europe", "Reads"]

## Lavar (labo)
### oligotype
nrow(otu_table(ps.pipiens.ovary %>% 
                 subset_samples(Location=="Lavar (labo)") %>% 
                 check_ps())) -> df3["Lavar (lab)", "Oligotypes"]
### samples
nrow(ps.pipiens.ovary@sam_data %>% subset_samples(Location=="Lavar (labo)")) -> df3["Lavar (lab)", "Samples"]
### reads
compute_read_counts(ps.pipiens.ovary %>% subset_samples(Location=="Lavar (labo)")) -> df3["Lavar (lab)", "Reads"]



# Culex quinquefasciatus
ps.quinque.ovary <- subset_samples(ps.quinque, Organ=="Ovary")
ps.quinque.ovary <- check_ps(ps.quinque.ovary)

## All location
### oligotype
nrow(ps.quinque.ovary@otu_table) -> df3["Culex quinquefasciatus", "Oligotypes"]
### samples
nrow(ps.quinque.ovary@sam_data) -> df3["Culex quinquefasciatus", "Samples"]
### reads
compute_read_counts(ps.quinque.ovary) -> df3["Culex quinquefasciatus", "Reads"]


## Guadeloupe
### oligotype
nrow(otu_table(ps.quinque.ovary %>% 
                 subset_samples(Location=="Guadeloupe") %>% 
                 check_ps())) -> df3["AGuadeloupe", "Oligotypes"]
### samples
nrow(ps.quinque.ovary@sam_data %>% subset_samples(Location=="Guadeloupe")) -> df3["AGuadeloupe", "Samples"]
### reads
compute_read_counts(ps.quinque.ovary %>% subset_samples(Location=="Guadeloupe")) -> df3["AGuadeloupe", "Reads"]


## Wolbachia -
### oligotype
# nrow(otu_table(ps.quinque.ovary %>% 
#                  subset_samples(Location=="Wolbachia -") %>% 
#                  check_ps())) -> df3["Wolbachia - (Slab TC)", "Oligotypes"]
### samples
# nrow(ps.quinque.ovary@sam_data %>% subset_samples(Location=="Wolbachia -")) -> df3["Wolbachia - (Slab TC)", "Samples"]
# ### reads
# compute_read_counts(ps.quinque.ovary %>% subset_samples(Location=="Wolbachia -")) -> df3["Wolbachia - (Slab TC)", "Reads"]



# Aedes aegypti
ps.aedes.ovary <- subset_samples(ps.aedes, Organ=="Ovary")
ps.aedes.ovary <- check_ps(ps.aedes.ovary)

## Guadeloupe
### oligotype
nrow(otu_table(ps.aedes.ovary %>% 
                 subset_samples(Location=="Guadeloupe") %>% 
                 check_ps())) -> df3[c("BGuadeloupe","Aedes aegypti"), "Oligotypes"]
### samples
nrow(ps.aedes.ovary@sam_data %>% subset_samples(Location=="Guadeloupe")) -> df3[c("BGuadeloupe","Aedes aegypti"), "Samples"]
### reads
compute_read_counts(ps.aedes.ovary %>% subset_samples(Location=="Guadeloupe")) -> df3[c("BGuadeloupe","Aedes aegypti"), "Reads"]


# Total
ps.ovary <- ps.filter %>% subset_samples(Organ=="Ovary")
ps.ovary <- ps.ovary %>% check_ps()
### oligotype
nrow(ps.ovary@otu_table) -> df3["Total", "Oligotypes"]
### samples
nrow(ps.ovary@sam_data) -> df3["Total", "Samples"]
### reads
compute_read_counts(ps.ovary) -> df3["Total", "Reads"]


df3[is.na(df3)] <- 0

Save

setwd(path_tsv)
write.table(df, "1D_Counts_all.tsv", sep="\t", row.names = TRUE, col.names=NA)
write.table(df2, "1D_Counts_whole.tsv", sep="\t", row.names = TRUE, col.names=NA)
write.table(df3, "1D_Counts_ovary.tsv", sep="\t", row.names = TRUE, col.names=NA)

Change factors format for organ and species in ps.filter

change_organ(ps.filter)
change_species(ps.filter)

Save phyloseq object after decontam

setwd(path_rdata)
saveRDS(ps.filter, "1D_MED_phyloseq_decontam.rds")

setwd(path_plot)

tiff("1D_counts_by_sample.tiff", units="in", width=20, height=18, res=300)
p_counts
dev.off()
## quartz_off_screen 
##                 2
tiff("1D_counts_by_genus.tiff", units="in", width=25, height=18, res=300)
p_genus
dev.off()
## quartz_off_screen 
##                 2